** Extracting from QWI, convert to dta
** JHL

**	Downloaded from public-use QWI: http://lehd.ces.census.gov/pub/
**	Unzip in linux: for z in *.gz; do gzip -d $z; done
**	QWI naming conventions: 
**	qwi_state_sa(sex-age)_firm(size,age)_geography(county,msa,state)etc.)_industry(naics_digit)_ownership(oslp(all) vs op(private))_seasonaladj(u)

*************************************
** Set up workspace
*************************************
version 14.0
clear all
set more off

cd "${path_home}"
adopath + ../programs

** log using "${path_log}/c05_qwi_extract_from_raw", text replace

*************************************
** [1] Parallelize over states: keep Nielsen industries within each file
*************************************

** Check date of data pull 
** shell stat "${path_big_dta}/qwi/latest"
cd "${path_big_dta}/qwi/latest"

if 1 == 1 {
		
	** Array: 1-51
	** local i : environment SLURM_ARRAY_TASK_ID 

	foreach i of numlist 1/51 {

		local states "ak al ar az ca co ct dc de fl ga hi ia id il in ks ky la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa ri sc sd tn tx ut va vt wa wi wv wy"
		tokenize "`states'"
		
		** Extract raw directly from QWI public directory 
		if 1 == 1 {
			
			copy "http://lehd.ces.census.gov/pub/``i''/latest_release/qwi_``i''_sa_f_gc_n4_op_u.csv.gz" "qwi_``i''_sa_f_gc_n4_op_u.csv.gz", replace
			
			** Unzip and erase gz file
			shell gzip -d "qwi_``i''_sa_f_gc_n4_op_u.csv.gz"
			
		}
		
		** Convert csv to dta 
		if 1 == 1 {
			import delimited using qwi_``i''_sa_f_gc_n4_op_u.csv, clear
			save qwi_``i''_sa_f_gc_n4_op_u, replace
		}
		
		** Extract Nielsen industries + restaurants
		if 1 == 1 {
			use qwi_``i''_sa_f_gc_n4_op_u, clear
			* Keep retail and restaurants for robustness checks 
				* Extension: Can keep a few more high MW share industries: 6243 voc rehab 4533 used merch stores 111 crop prod 7212 7213 rec parks, room and boarding houses 6244 child day care (6231 nursing home)
			keep if industry==4451|industry==4461|industry==4471|industry==4521|industry==4529|industry==7223|industry==7224|industry==7225
			save qwi_``i''_sa_f_gc_n4_op_u_ns, replace 
		}
		
	}
}

*************************************
** Close workspace
*************************************
timer off 1
timer list 1
** log close